{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## <font color='green'> Binary Classification (Dense Matrix) <font>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### <font color='green'> 1. Description<font>\n",
    "\n",
    "This program shows creditcard fraud detection using decision tree. Please download the data from https://www.kaggle.com/mlg-ulb/creditcardfraud manually (registration required) and place the file (creditcard.csv) in `datasets` directory.\n",
    "\n",
    "The datasets contains transactions made by credit cards in September 2013 by european cardholders. This dataset presents transactions that\n",
    "occurred in two days, where we have 492 frauds out of 284,807 transactions. It contains only numerical input variables which are the\n",
    "result of a PCA transformation."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### <font color='green'> 2. Data Preprocessing<font>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Not Fraud    284315\n",
      "Fraud           492\n",
      "Name: Class, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# prepare data\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "df = pd.read_csv('./datasets/creditcard.csv')\n",
    "class_names = {0:'Not Fraud', 1:'Fraud'}\n",
    "print(df.Class.value_counts().rename(index = class_names))\n",
    "\n",
    "data_features = df.drop(['Time', 'Class'], axis=1).values\n",
    "data_target = df['Class'].values\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "np.random.seed(123)\n",
    "X_train, X_test, y_train, y_test = train_test_split(data_features, data_target, train_size=0.70, test_size=0.30, random_state=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### <font color='green'> 3. Implementation using Frovedis<font>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train time: 0.211 sec\n"
     ]
    }
   ],
   "source": [
    "# train\n",
    "import os, time\n",
    "from frovedis.exrpc.server import FrovedisServer\n",
    "from frovedis.mllib.tree import DecisionTreeClassifier as frovDecisionTreeClassifier\n",
    "FrovedisServer.initialize(\"mpirun -np 8 {}\".format(os.environ['FROVEDIS_SERVER']))\n",
    "\n",
    "model = frovDecisionTreeClassifier(max_depth=8)\n",
    "t1 = time.time()\n",
    "model.fit(X_train, y_train)\n",
    "t2 = time.time()\n",
    "print (\"train time: {:.3f} sec\".format(t2-t1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "confusion matrix:\n",
      "[[85292    16]\n",
      " [   34   101]]\n",
      "Accuracy: 99.94%\n",
      "Recall : 0.75\n",
      "F1 Score : 0.8\n"
     ]
    }
   ],
   "source": [
    "# predict\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import f1_score, recall_score\n",
    "pred = model.predict(X_test)\n",
    "cmat = confusion_matrix(y_test, pred)\n",
    "tpos = cmat[0][0]\n",
    "fneg = cmat[1][1]\n",
    "fpos = cmat[0][1]\n",
    "tneg = cmat[1][0]\n",
    "f1Score = round(f1_score(y_test, pred), 2)\n",
    "recallScore = round(recall_score(y_test, pred), 2)\n",
    "print('confusion matrix:')\n",
    "print(cmat)\n",
    "print('Accuracy: '+ str(np.round(100*float(tpos+fneg)/float(tpos+fneg + fpos + tneg),2))+'%')\n",
    "print(\"Recall : {recall_score}\".format(recall_score = recallScore))\n",
    "print(\"F1 Score : {f1_score}\".format(f1_score = f1Score))\n",
    "\n",
    "FrovedisServer.shut_down()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### <font color='green'> 3. Implementation using scikit-learn<font>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train time: 5.762 sec\n"
     ]
    }
   ],
   "source": [
    "# train\n",
    "import os, time\n",
    "from sklearn.tree import DecisionTreeClassifier as skDecisionTreeClassifier\n",
    "\n",
    "model = skDecisionTreeClassifier(max_depth=8)\n",
    "t1 = time.time()\n",
    "model.fit(X_train, y_train)\n",
    "t2 = time.time()\n",
    "print (\"train time: {:.3f} sec\".format(t2-t1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "confusion matrix:\n",
      "[[85295    13]\n",
      " [   37    98]]\n",
      "Accuracy: 99.94%\n",
      "Recall : 0.73\n",
      "F1 Score : 0.8\n"
     ]
    }
   ],
   "source": [
    "# predict\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.metrics import f1_score, recall_score\n",
    "pred = model.predict(X_test)\n",
    "cmat = confusion_matrix(y_test, pred)\n",
    "tpos = cmat[0][0]\n",
    "fneg = cmat[1][1]\n",
    "fpos = cmat[0][1]\n",
    "tneg = cmat[1][0]\n",
    "f1Score = round(f1_score(y_test, pred), 2)\n",
    "recallScore = round(recall_score(y_test, pred), 2)\n",
    "print('confusion matrix:')\n",
    "print(cmat)\n",
    "print('Accuracy: '+ str(np.round(100*float(tpos+fneg)/float(tpos+fneg + fpos + tneg),2))+'%')\n",
    "print(\"Recall : {recall_score}\".format(recall_score = recallScore))\n",
    "print(\"F1 Score : {f1_score}\".format(f1_score = f1Score))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
